
clear
set more off
capture log close
set seed 12345
set matsize 10000
set maxvar 20000	
* set globals
global OUTPUT "$DIR/Output"
global DATA "$DIR"	
cd "$DIR"

/* SET LOCALS */
* set earliest tax year to keep
local yrb = 2008
* set last tax year to keep
local yre = 2016
* set excluded base year for event-time and controls
local baseyr "2012"
* log or level ("ln" "level")
local rlog "ln"
local rloglab "`rlog'"

* set DD year
local ddyear "2016"


**
** prep top firm indicator
**
use $DATA/reg_file_firm.dta, clear

keep firm_tin year t1_2012     
sort firm_tin year
tempfile tt1
save `tt1', replace

****************************
**  Prep individual data
****************************
use $DATA/scorp100p_entsep_file, clear

local yy "`ddyear'"
keep if year==2012 | year==2011 | year==`yy' | year==`yy'-1
sort firm_tin year
merge m:1 firm_tin year using `tt1'
tab year _merge
keep if _merge==3
drop _merge
sort firm_tin tin year

* create wage bins by deciles of earnings distribution
foreach i in  wages {
	gen `i'_q10=.
	foreach j in 2012 {
		capture drop xq
		xtile xq = `i' if ft==1 & year==`j' & t1_2012==1, nquantiles(10)
		replace `i'_q10=xq if ft==1 & year==`j' & t1_2012==1
	}
}

sum wages if wages_q10==1,d
sum wages if wages_q10==2,d
sum wages if wages_q10==3,d
sum wages if wages_q10==4,d
sum wages if wages_q10==5,d
sum wages if wages_q10==6,d
sum wages if wages_q10==7,d
sum wages if wages_q10==8,d
sum wages if wages_q10==9,d
sum wages if wages_q10==10,d
sum wages if ft==1 & year==2012 & t1_2012==1,d

gen wagecat = 1 if wages<5870
replace wagecat = 2 if wages>=5870 
replace wagecat = 3 if wages>=8760
replace wagecat = 4 if wages>=12610
replace wagecat = 5 if wages>=17340
replace wagecat = 6 if wages>=22900
replace wagecat = 7 if wages>=29380
replace wagecat = 8 if wages>=37130 
replace wagecat = 9 if wages>=47950
replace wagecat = 10 if wages>=67590
replace wagecat = 11 if wages>=89640 
replace wagecat = 12 if wages>=160830
replace wagecat = . if wages==.
tab wagecat if ft==1 & year==2012 & t1_2012==1
local wc "12"

gen wages_ft = 0
replace wages_ft = wages if ft==1
bys firm_tin year: egen wages_tot = sum(wages)
bys firm_tin year: egen wages_ft_tot = sum(wages_ft) 

****
** Create totals file
****
keep tin ft wages wages_ft  wages_tot wages_ft_tot firm_tin year wagecat
collapse (count) n_cat=tin (sum) n_catft=ft wages wages_ft (mean) wages_tot wages_ft_tot , by(firm_tin year wagecat)

sort firm_tin year
foreach o in wages_ft n_catft {
	forval i=1/`wc' {
		gen zz = 0
		replace zz = `o' if wagecat==`i'
		by firm_tin year: egen `o'`i' = sum(zz)
		drop zz
	}
}  
bys firm_tin year: gen nf=_n==1
keep if nf==1

sort firm_tin year
foreach o in wages_ft {
	forval i=1/`wc' {
		gen zz = 0 
		replace zz = `o'`i' if year==2012
		by firm_tin: egen `o'`i'_2012 = sum(zz)
		drop zz
	}
	gen zz = 0 
	replace zz = `o'_tot if year==2012
	by firm_tin: egen `o'_tot2012 = sum(zz)
	drop zz	
}
drop wages wages_ft 

sort firm_tin year
merge 1:1 firm_tin year using $DATA/reg_file_firm.dta
tab year _merge
keep if _merge==3
drop _merge

* create pre-tax total including owners
gen owntot = tot_own_wages+netinc
gen tot_pre = wages_ft_tot + owntot

forval i = 1/12 {
	gen pre_sh`i' = wages_ft`i'/tot_pre
}
gen pre_sh99 = owntot/tot_pre

* create (change in) owner net income 
gen owntot_net = owntot-liab_tot 
replace owntot_net = owntot if owntot<=0 
* adjust for reasobable avg tax rate for outliers
gen azz=liab_tot/owntot
replace owntot_net = owntot*.6 if azz>.4 & azz~=.
gen owntot_net_iv = owntot_net if year<=2012
replace owntot_net_iv = owntot-(liab_tot_2012+dliab_tot_iv_2012) if year>2012
replace owntot_net_iv = owntot if owntot<=0 

* net = net owner plus wage bill
gen tot_net_iv = wages_ft_tot + owntot_net_iv
gen net_sh99_iv = owntot_net_iv / tot_net_iv

* wage net shares 
forval i = 1/12 {
	gen net_sh`i'_iv = wages_ft`i'/tot_net_iv
}

* Winsorize shares
foreach w in 1 {
	local ptop = 100-`w'
	local pbot = `w'
	foreach o in pre_sh99 net_sh99_iv {
		sort year `o'
		by year: egen zt=pctile(`o'), p(`ptop')
		by year: egen zb=pctile(`o'), p(`pbot')
		gen w`o'`ptop' = `o'
		replace w`o'`ptop' = zt if `o'>zt & `o'~=.
		replace w`o'`ptop' = zb if `o'<zb & `o'~=.
		drop zt zb
	}
}

* create baseline totals including owner income and for net of tax
foreach v in wages_ft_tot owntot_net owntot_net_iv tot_net_iv tot_pre owntot tot_own_wages {
	gen vv=0
	replace vv = `v' if year==2012
	bys firm_tin: egen `v'_2012=sum(vv)
	drop vv
	gen vv = 0
	replace vv = 1 if `v'==. & year==2012
	by firm_tin: egen zmiss=sum(vv)
	replace `v'_2012=. if zmiss==1
	drop vv zmiss
}
tab year if owntot==.
tab year if owntot_2012==.

local wc "12"
* percent change surplus
foreach o in wages_ft {
	forval i=1/`wc' {
		gen dptn_`o'`i' = (`o'`i' - `o'`i'_2012) / tot_net_iv_2012
	}
}

local pinc "owntot_2012" 
foreach i in `pinc' {
	xtile `i'cat = `i',nquantiles(10)
}

* trim outlier wage bills for regression (can't winsorize or components won't sum)
sum wages_ft_tot if year==2012,d
sum wages_ft_tot if year==2016,d
gen tt = 0
replace tt = 1 if wages_ft_tot>1.02e7 & year==2012
replace tt = 1 if wages_ft_tot>1.28e7 & year==2016
tab year tt

* create bins for deciles for main regs
forval j = 1/9 {
	gen dec`j' = wages_ft`j' 
	gen dec`j'_2012 = wages_ft`j'_2012 
}
egen dec10 = rowtotal(wages_ft10-wages_ft12)
egen dec10_2012 = rowtotal(wages_ft10_2012-wages_ft12_2012)
foreach o in dec {
	forval i=1/10 {
		gen dptn_`o'`i' = (`o'`i' - `o'`i'_2012) / tot_net_iv_2012
	}
}

* save file for when do stayer-turnover decomp (3.1_appendix_distrib_results.do)
sort firm_tin year
compress
save $DATA/wage_distrib_all.dta, replace


use $DATA/wage_distrib_all.dta, clear


*************************************
*************************************
*************************************
**  Figure 5 Panel B - baseline distrib
*************************************
*************************************
*************************************
* create table to show share of top and bottom firms with missings in each cell / decile 
* for 15 worker firms
preserve
	keep if tot_pre_2012>0 & tot_pre_2012~=. & n_ft_lower_2012>=15 & year==2012 & tt1112==1
	collapse (mean) net_sh*  [aw=tot_pre_2012]
 	outsheet using $OUTPUT/figure5_panelB_distrib_pre.csv, comma replace
restore


*************************************
*************************************
*************************************
**  Figure 5 Panel A - earnings decom
*************************************
*************************************
*************************************
local bm1 = `baseyr'-1
local bp1 = `baseyr'+1
local r1 "i.valaddpw_ft_`baseyr'cat##ib`baseyr'.year i.owntot_2012cat##ib2012.year i.naics_`baseyr'##ib`baseyr'.year i.state_2012##ib`baseyr'.year"

local t "tt1112"
forval y = `yrb' / `yre' {
	drop t`y'
	gen t`y' = 0
	replace t`y' = . if `t'==.
	replace t`y' = `t' if year==`y'
}
local events "t2011 t2015 t2016 y2011 y2015 y2016 `t'"

* share of surplus for firms w/ at least 15 workers
foreach r in 1 {
	forval j=1/9 {
		eststo r`r'_w`j': quietly areg dptn_dec`j' `events' `r`r''  [aw=tot_pre_2012] if tot_pre_2012>0 & n_ft_lower_2012>=15 , absorb(firm_tin) vce(cluster firm_tin)
	}
	eststo r`r'_w10: quietly areg dptn_dec10 `events' `r`r''  [aw=tot_pre_2012] if tot_pre_2012>0 & tt==0 & n_ft_lower_2012>=15, absorb(firm_tin) vce(cluster firm_tin)
	eststo r`r'_wsh99: quietly areg wnet_sh99_iv `events' `r`r''  [aw=tot_pre_2012] if tot_pre_2012>0 & n_ft_lower_2012>=15, absorb(firm_tin) vce(cluster firm_tin)
}
esttab using "$OUTPUT/figure5_panelA.csv", replace se ar2 mtitles( ) keep(t2016)
eststo clear



*************************************
*************************************
*************************************
**  Appendix Figure A.6 - Panel C - earnings decom DD
*************************************
*************************************
*************************************
* indicator for firm having some value in that wage bin
forval j = 1/12 {
gen vv = 0
replace vv = 1 if dptn_wages_ft`j'!=0 & year==2016
by firm_tin: egen ind`j'=sum(vv)
tab year ind`j'
drop vv
}

local j "10"
gen vv = 0
replace vv = 1 if dptn_dec`j'!=0 & year==2016
by firm_tin: egen zind`j'=sum(vv)
tab year zind`j'
drop vv

foreach r in 1 {
	forval j=1/12 {
		eststo r`r'_w`j': quietly areg dptn_wages_ft`j' `events' `r`r''  [aw=tot_pre_2012] if tot_pre_2012>0 & ind`j'==1, absorb(firm_tin) vce(cluster firm_tin)
	}
	eststo r`r'_zw10: quietly areg dptn_dec10 `events' `r`r''  [aw=tot_pre_2012] if tot_pre_2012>0 & zind10==1, absorb(firm_tin) vce(cluster firm_tin)
}
esttab using "$OUTPUT/figure_A6_panelC.csv", replace se ar2 mtitles( ) keep(t2016) 
eststo clear








clear 
clear
exit
exit


